
do "E:/ReplicateBuild/02_code/00_environment/00_set_environment.do"

*Table of Contents
local evaas_skinny = 0
local masterbuild = 0
	local masterbuild_main = 1
	local masterbuild_skinny = 1
	local masterbuild_clean = 1
	local masterbuild_PPI = 1
local school_student_demographics = 0
local curtest = 0
local accommodations = 0
local course_membership = 1
	local course_membership_main = 1
	local teacher_school_assign = 1
	local course_membership_clean = 1
		local cm_clean_prep = 1
		local cm_clean_math = 1
		local cm_clean_reading = 1
local discipline = 1

	
*********************************************
*Perform slight cleaning of EVAAS and prepare
* more manageable file.
*********************************************
if `evaas_skinny' == 1{ 
	use "$basedata/FOCAL_teacher_evaas.dta", clear  
	
	****Use FOCAL crosswalk to merge FOCAL EVAAS teachers
	merge m:1 FOCALEmployeeID using "$basedata\FOCAL_employees_crosswalk.dta"
	assert _merge != 1
	keep if _merge == 3
	drop _merge
	
	ren ncerdc_id teachid
	destring evaas_score_year, gen(year) force
	*Drops the 2/3-year averages
	drop if year==.
	
	gen math = (evaas_subject=="M")
	egen evaas_math = max(math), by(teachid year)
	*keep only those with math evaas score
	keep if evaas_math == 1
	
	keep FOCALEmployeeID teachid year index evaas_math 
	duplicates drop teachid year, force
	
	save  "$basedata/FOCAL_evaas_skinny.dta", replace
	
}

***********************************************
*Aggregate student test and demographic records
***********************************************
if `masterbuild' == 1{
	if `masterbuild_main' == 1{
	local varn " daysmem rd_score rd_cscore ma_score ma_cscore mastid"
	local keep_vars "mastid year grade ma_score  rd_score ma_test_id rd_test_id female ethnic  MIGRANT DISAD disability ENG_LEARN lea schlcode gifted daysmem"

	forval y = 2007/2018 {
		use  "$rawdata/Student/MBuild/mb_`y'.dta", clear
		
		destring grade , replace force
		drop if grade >8 
		gen year =`y'
		
		if `y' == 2007 {	
			gen female=(sex=="F")
			gen gifted=(ec=="01")
			/*gen DISAD=(DISAD=="F"|DISAD=="R"|DISAD=="T")*/ // hiding variable names
			gen MIGRANT=(migrant=="Y")
			gen ENG_LEARN=(lep_current=="Y")
			gen disability=(ec!="NU")
			replace disability= 0 if ec=="01"
		}
		if `y' == 2008 {
			gen female=(sex=="F")
			gen gifted=(aig_math=="Y" | aig_read=="Y" )
			/*gen DISAD=(DISAD=="Y")*/ // hiding variable names
			gen MIGRANT=(migrant=="Y")
			gen ENG_LEARN=(lep_current=="Y")
			gen disability=(ec!="NULL")
		}
		if `y' == 2009 {
			ren minority_code ethnic
			gen female=(sex=="F")
			gen gifted=(aig_m=="Y" | aig_r=="Y" )
			/* gen DISAD=(DISAD=="Y") */ // hiding variable names
			gen MIGRANT=(migrant=="Y")
			gen ENG_LEARN=(lep=="Y")
			gen disability=(swd=="Y")
		}
		if `y' == 2010 {
			ren *_scoreN *_score
			ren minority_code ethnic
			
			gen female=(sex=="F")
			gen gifted=(aig_m=="Y" | aig_r=="Y" )
			/* gen DISAD=(DISAD=="Y") */ // hiding variable names
			gen MIGRANT=(migrant=="Y")
			gen ENG_LEARN=(lep=="Y")
			gen disability=(swd=="Y")
		}
		if `y' == 2011 | `y' == 2012 {
			ren *_scoreN *_score	
			
			gen ethnic=1 if ethnicity=="I"
			replace ethnic=2 if ethnicity=="A"
			replace ethnic=3 if ethnicity=="H"
			replace ethnic=4 if ethnicity=="B"
			replace ethnic=5 if ethnicity=="W"
			replace ethnic=6 if ethnicity=="M"
			replace ethnic=7 if ethnicity=="P"
			
			gen female=(sex=="F")
			gen gifted=(aig_m=="Y" | aig_r=="Y" )
			/* gen DISAD=(DISAD=="Y") */ // hiding variable names
			gen MIGRANT=(migrant=="Y")
			gen ENG_LEARN=(lep=="Y")
			gen disability=(swd=="Y")
		}
		if `y' >= 2013 & `y' <= 2017 {
			ren pc_*score *score
			ren pc_*_type  *_test_id
			ren ethnic ethnicity
			
			gen ethnic=1 if ethnicity=="I"
			replace ethnic=2 if ethnicity=="A"
			replace ethnic=3 if ethnicity=="H"
			replace ethnic=4 if ethnicity=="B"
			replace ethnic=5 if ethnicity=="W"
			replace ethnic=6 if ethnicity=="M"
			replace ethnic=7 if ethnicity=="P"
			
			gen female=(sex=="F")
			gen gifted=(aig!="N")
			/* gen DISAD = inlist(DISAD,"1","2","3","5","6") */ // hiding variable names
			if `y' == 2014 | `y' == 2017 {
			    gen migrant = ""
				}
			gen MIGRANT=(migrant=="Y")
			replace MIGRANT = . if missing(migrant)
			gen ENG_LEARN=(lep=="Y")
			gen disability=(swd=="Y")
		}
		if `y' == 2018 {
			ren pc_*score *score
			ren pc_*_type  *_test_id
			ren ethnic ethnicity
			
			gen entry_min = max(date(entry_date, "YMD"), date(entry_date_fdf, "YMD"))
			format entry_min %td
			egen min_entry=min(entry_min), by(lea schlcode mastid)
			
			gen pc_date_fixed = max(date(pc_ma_date, "YMD"), date(pc_rd_date, "YMD"))
			format pc_date_fixed %td
			egen test_dt_max=max(pc_date_fixed), by(lea schlcode mastid)
			
			gen daysmem= test_dt_max-min_entry
			replace daysmem = 0 if daysmem < 0
			
			gen ethnic=1 if ethnicity=="I"
			replace ethnic=2 if ethnicity=="A"
			replace ethnic=3 if ethnicity=="H"
			replace ethnic=4 if ethnicity=="B"
			replace ethnic=5 if ethnicity=="W"
			replace ethnic=6 if ethnicity=="M"
			replace ethnic=7 if ethnicity=="P"
			
			gen female=(sex=="F")
			gen gifted=(aig!="N")
			/* gen DISAD= inlist(eds_code,"1","2","3","5","6","7") */ // hiding variable names
			gen MIGRANT=(migrant=="Y")
			gen ENG_LEARN=(els=="Y"| els=="1" | els=="2")
			gen disability=(swd=="Y")
		}
		
		local varn " daysmem rd_score rd_cscore ma_score ma_cscore mastid"
		foreach i in `varn' ethnic {
			cap destring `i', replace force
			}
			
		keep `keep_vars'
		
		tempfile temp_mb_`y'
		save `temp_mb_`y'', replace
		
		}
	clear
	forval y = 2007/2018 {
		append using `temp_mb_`y''
		save "$basedata/mb.dta", replace
		}
	}
	***********************************************
	*Construct skinny file
	***********************************************
	if `masterbuild_skinny' == 1{
		use "$basedata/mb.dta", clear
		keep lea schlcode mastid year daysmem ma_score rd_score ma_test_id rd_test_id
		ren daysmem days
		egen daysmem=max(days), by(mastid lea schlcode year)
		duplicates drop lea schlcode mastid year, force
		drop days
		save  "$basedata/mb_skinny.dta", replace
	}
	***********************************************
	*Perform proper cleaning of masterbuild data 
	* and standardize students' annual test scores
	***********************************************
	if `masterbuild_clean' == 1{
		use "$basedata/mb.dta", clear
		
		*1) Determine if student is DISAD at any point during the year.
		foreach i in grade ethnic female gifted DISAD MIGRANT ENG_LEARN disability {
			egen `i'_max=max(`i'), by(mastid year)
			replace `i' = `i'_max if `i'_max !=.
			drop `i'_max
		}
		
		*2) Keep only scores from school student spent the most time. 
		preserve
			drop if mastid == .
			egen daysmem_max = max(daysmem), by(mastid year)
			keep if daysmem == daysmem_max
			
			keep lea schlcode year mastid
			duplicates drop
			
			*small number of students have same number of max days at different schools. 
			sort lea schlcode year mastid
			bysort mastid year: keep if _n == 1
			
			isid mastid year
			
			tempfile temp_main_school
			save `temp_main_school', replace
		restore
		
		merge m:1 lea schlcode year mastid using `temp_main_school'
		keep if _merge == 3 
		drop _merge
		
		*3) Get max score for each student/subject in year. Remove students without scores in year.
		
		gen score_rd = rd_score
		gen score_ma = ma_score
		
		gen rd_test_id_best = ""
		gen ma_test_id_best = ""
		
		*Order of tests indicates preferrence (worst to best)
		local reading_tests "EOG CLR3 CLR4 CLR5 CLR6 CLR7 CLR8 X1 X1R3 X1R4 X1R5 X1R6 X1R7 X1R8 X2 X2R1 X2R3 X2R4 X2R5 X2R6 X2R7 X2R8 RD03 RD04 RD05 RD06 RD07 RD08 RG"
		foreach i in `reading_tests' {
			gen `i'_rd_score = .
			replace `i'_rd_score = rd_score if rd_test_id=="`i'"
			bysort mastid year: egen `i'_rd_max = max(`i'_rd_score)
			*Update test id later in loop if more preferred test is taken.
			replace rd_test_id_best = "`i'" if !missing(`i'_rd_max)
			*Update test score later in loop if more preferred test is taken.
			replace score_rd = `i'_rd_max if !missing(`i'_rd_max)
		}
		
		*Order of tests indicates preferrence (worst to best)
		local math_tests "EOG CLM3 CLM4 CLM5 CLM6 CLM7 CLM8 X1 X1M3 X1M4 X1M5 X1M6 X1M7 X1M8 X2 X2M3 X2M4 X2M5 X2M6 X2M7 X2M8 MA03 MA04 MA05 MA06 MA07 MA08 RG"
		foreach i in `math_tests' {
			gen `i'_ma_score = .
			replace `i'_ma_score = ma_score if ma_test_id=="`i'"
			bysort mastid year: egen `i'_ma_max = max(`i'_ma_score)
			*Update test id later in loop (more preferred) test is used.
			replace ma_test_id_best = "`i'" if !missing(`i'_ma_max)
			*Update test score later in loop if more preferred test is used.
			replace score_ma = `i'_ma_max if !missing(`i'_ma_max)
		}

		drop if mastid==.
		drop if score_rd==. & score_ma==.
		
		keep grade schlcode lea ethnic mastid year female gifted DISAD MIGRANT ENG_LEARN disability score_rd score_ma rd_test_id_best ma_test_id_best
		
		rename rd_test_id_best rd_test_id 
		rename ma_test_id_best ma_test_id
		
		
		
		*4) Drop duplicate student years.
		
		duplicates drop
		isid mastid year
		
		*5) Standardize scores
		
		foreach i in ma rd {
			*standardize score by subject year, grade, test type
			ren score_`i' score_`i'IRT
			egen score_`i'=std(score_`i'IRT), by(year grade `i'_test_id)
		}
		
		***make persistent DISAD variable
		egen DISAD_persist=sum(DISAD), by(mastid)
		save  "$basedata/mb_clean.dta", replace
		}
	if `masterbuild_PPI' == 1{
		use "$basedata/mb_clean.dta", clear
		keep grade score_rd score_ma mastid year female gifted DISAD MIGRANT ENG_LEARN disability	
		
		reshape wide score_rd score_ma grade female gifted DISAD MIGRANT ENG_LEARN disability, i(mastid) j(year)
		
		forval i = 2007/2018 {
			gen score_rd`i'_squared = score_rd`i'^2
			gen score_rd`i'_cubed = score_rd`i'^3
			
			gen score_ma`i'_squared = score_ma`i'^2
			gen score_ma`i'_cubed = score_ma`i'^3
		}
		
		save  "$basedata/mb_PPI.dta", replace
		
	}
}

***********************************************
*Use cleaned test data to derive school-level
* demographics 
***********************************************
if `school_student_demographics' == 1{
	use "$basedata/mb_clean.dta", clear
	gen white=(ethnic==5)
	gen black=(ethnic==4)
	gen hispanic=(ethnic==3)
	collapse (mean) white black hispanic DISAD ENG_LEARN disability gifted score_ma score_rd score_maIRT score_rdIRT, by(schlcode lea year)

	save "$rawdata\ncerdc_school_demo_means.dta", replace
}
	
***********************************************
*Aggegate curtest data.
***********************************************
if `curtest' == 1 {
	forval y = 2008/2019 {
		use  "$rawdata\Student\Tests\curtest_pub`y'.dta", clear
		
		
		if `y' < 2010 {
			destring(CSCORE_ERR), replace force
		}
		
		if `y' < 2014 {
			destring(CSCORE), replace force
		}
		
		if `y' != 2013 & `y' != 2014 {
			destring grade, replace force
		}

		if `y' == 2013 | `y' == 2014 {
			destring TEST_ID, gen(grade) ignore(R D M A 1 X 2 A B C S O 0 W I E N G L T H )
		}
		
		if `y' < 2015 {
			destring(mastid), replace force
		}
		
		foreach i in test_lea test_schl lea schlcode testdt year SCORE ACH_LEVEL mastid {
		destring `i', replace force
				}
				
			
		drop if (grade>8 & grade!=.) | grade < 2008 
		compress
		tempfile temp_curtest_`y'
		save `temp_curtest_`y'', replace
		
	}
	clear
	forval y = 2008/2019 {
		append using `temp_curtest_`y''
		save "$basedata/curtest.dta", replace
		}
	}

***********************************************
* Determine annual student test accomodations
***********************************************	
if `accommodations' == 1{
	*Accommodations for 2007/2008
	forv  v=7/8 {
		forv g=3/8 {
			clear
			import sas using "$rawdata\Student\End of Grade\EOG\eog`g'pub0`v'.sas7bdat"
			gen test_grd_lvl=`g'
			destring grade, replace
			replace grade=`g' if grade==.
			gen year=2000+`v'
			cap destring tm* , replace force 

			keep mastid year grade tm*
			compress
			
			tempfile eog_tm_`g'_`v'
			save `eog_tm_`g'_`v''

		}
	}
	clear
	forv  v=7/8 {
		forv g=3/8 {
			append using `eog_tm_`g'_`v''
		}
	}
	save "$basedata/eog_tm.dta", replace
	
	local tm_m " tmbrailm  tmbprintm  tmtechm  tmkeybdm  tmabacusm tmmagnfym  tmhomem   tmroomm  tmtimem  tm1pagem  tmmultm  tmaloudsm   tmmarkm  tmtransm  tmsignm tmaloudm tmbwritm tmnotifm  "
	local tm_r "tmbrailr  tmbprintr  tmtechr  tmkeybdr  tmabacusr  tmdictatr tmmagnfyr  tmhomer  tmroomr  tmtimer  tm1pager  tmmultr  tmaloudsr  tmmarkr   tmtransr  tmsignr  tmbwritr tmnotifr  "
	egen math_accom=rowmax(`tm_m')
	egen read_accom=rowmax(`tm_r')
	collapse (max) *_accom, by(year mastid)
	
	tempfile temp_test_accom_0708
	save `temp_test_accom_0708'
	
	*Accomodations for 2009+
	use "$basedata/curtest.dta", clear
	gen math = 0
	foreach i in MA0 X1M { 
		forv v=3/8 { 
			replace math =1 if TEST_ID== "`i'`v'" 
		}
		replace math =1 if TEST_ID== "`i'0" 
	} 
	replace math = 1 if TEST_ID == "MTH1"
	replace math = 1 if TEST_ID == "ACTM"
	gen accom = (ACCOMM_LIST!="" & ACCOMM_LIST!="NULL")
	replace accom = 1 if ACCOMMODATION == "Y"
	gen math_accom=math*accom
	gen read=0
	foreach i in RD0 X1R { 
		forv v=3/8 { 
			replace read =1 if TEST_ID== "`i'`v'" 
		}
		replace read =1 if TEST_ID== "`i'0" 
	} 
	foreach i in  A B R {
		replace read =1 if TEST_ID== "RD3`i'"
	}
	replace read =1 if TEST_ID== "X1R0"
	replace read =1 if TEST_ID== "ACRD"
	gen read_accom=read*accom
	collapse (max)  *_accom, by(mastid year)
	drop if year<2009
	
	tempfile temp_test_accom_0919
	save `temp_test_accom_0919'
	
	
	use `temp_test_accom_0708', clear 
	append using `temp_test_accom_0919'
	
	save "$basedata/test_accom.dta", replace
}

***********************************************
*Aggregate all course membership records 
* mark classes based on math/reading status
***********************************************
if `course_membership' == 1{
	if `course_membership_main' == 1{
		forval y = 2007/2018 {
			use "$rawdata/Student/Course Membership/crs_memb_pub`y'.dta", clear
			ren reporting_year year
			
			
			if `y' <= 2008 {
				drop numstudents
			}
			
			if `y' <= 2013 {
				rename semester term
			}
			
			if `y' > 2013 {
				ren coursecode statecourse
				ren period meetingcode
			}
			
			local vars "grade year meetingcode mastid teachid"
			foreach i in `vars' {
				destring `i', replace force
				}
				
			drop if grade>8
			drop if grade<3
			drop if teachid==.
			drop if mastid==.
			
			*numstudents missing for 2014+. For 2007-2013, numstudents appears to be calculated according to the below command in most cases (ECs like phys ed, art, music are the usual exceptions).
			bysort lea schlcode term year coursetitle statecourse section meetingcode teachid: gen numstudents = _N
			
			replace coursetitle = lower(coursetitle)
			gen str1 code1=substr(statecourse,1,1)
			gen str2 code=substr(statecourse,1,2)
			gen str4 code2=substr(statecourse,1,4)
			
			gen math1=(code=="20" | code=="21"| code=="22" | code=="23" | code=="24" | code=="25")
			
			local mathtext "math alg geo calc " 
			local excepttext "study special resource pullout remed enrich indiv except"
			foreach i in `mathtext' `excepttext' mat  {
				gen `i'=strpos(coursetitle,"`i'")
				replace `i'=1 if `i'>0
			}
			
			gen sci = (code1=="3")
			egen math_ct=rowmax(`mathtext')
			egen except1=rowmax(`excepttext')
			
			egen sum_tchr=sum(1), by(mastid year teachid)
			egen sum_tchr_max=max(sum_tchr), by(mastid year)
			
			gen math3=(math1==1 & math_ct==1 & except1==0)
			gen math2=(math1==1 & math_ct==1)
			gen math20 = (code=="20")
			
			gen self_cont=(code2=="0000")
			gen read1=(code=="10")
			gen read2=(code2=="1010")
			gen read3 =read2
			replace read3=0 if except1==1
			
			foreach i in ela lit social {
				gen `i'=strpos(coursetitle, "`i'")
				replace `i'=1 if `i'>0
			}

			***
			keep mastid teachid year math* self* except*  grade section statecourse meetingcode  birthdt lea schlcode sum* numstudents sci* mat coursetitle read* ela* social* lit* code*
			compress
			tempfile temp_crs_mem_`y'
			save `temp_crs_mem_`y''
		}
		clear
		forval y = 2007/2018 {
			append using `temp_crs_mem_`y''
			save "$basedata/crs_mem.dta", replace
			}
}
	***********************************************
	* Match teachers to schools and mark if they
	* teach math, reading, or self contained classes
	***********************************************
	if `teacher_school_assign' == 1{
		use "$basedata/crs_mem.dta", clear
		
		egen math1_tchr=max(math1), by(mastid year lea schlcode teachid)
		egen read1_tchr=max(read1), by(mastid year lea schlcode teachid)
		egen self_cont_tchr=max(self_cont), by(mastid year lea schlcode teachid)
		
		collapse math1_tchr read1_tchr self_cont_tchr, by(teachid year lea schlcode)
		gen ncerdc_id =teachid
		gen sy=year
		save "$basedata/teacher_school_assign.dta", replace
	}
	***********************************************
	*Match students to primary math/read teacher
	***********************************************
	if `course_membership_clean' == 1{
		***********************************************
		*Prepare course membership records for 
		* identifying primary math/reading teacher
		* combine with test score data
		***********************************************
		if `cm_clean_prep' == 1{
			use "$basedata/crs_mem.dta", clear

			merge m:1 mastid year schlcode lea using "$basedata/mb_clean.dta"
			drop if _merge==2
			rename _merge mrg_mb
			*mrg_mb == 3 implies student is at school they spent the most time.
			
			gen ncerdc_id = teachid
			gen sy = year
			
			merge m:1 sy ncerdc_id using "$basedata/ncerdc_experience.dta"
			drop if _merge==2 
			rename _merge mrgPAY

			drop gross_pay educ_pay_level
			
			*Keep schools where student spends the most days (and has test data)
			keep if mrg_mb == 3
			
			*Flag students with the types of teachers they have
			foreach i in math1 math2 math3 math_ct math20 except1 self_cont mat sci read1 ela lit social read2 read3 {
					sum `i'
					egen `i'_max=max(`i'), by(mastid year lea schlcode)
				}
			save "$temp\t.dta", replace
		}
		
		***********************************************
		* Match student to primary math teacher/course
		***********************************************
		if `cm_clean_math' == 1 {
			use "$temp\t.dta", clear
		
			*These students with no identifiable math/science class are mostly only taking ECs (PE, music, visual arts etc.) Drop them.
			drop if math1_max+math2_max+math3_max+math_ct_max==0 & self_cont_max==0 & sci_max==0
			
			*Categories:
			**(1) Student has high-certainty math teacher
			**(2) Student has self-contained teacher, but no high-certainty math teacher
			**(3) Student has med-certainty math teacher, but no self-contained teacher
			**(4) Student has low-certainty math teacher, but no self-contained teacher
			**(5) Student has no math teacher nor self-contained teacher, but a science teacher
			
			gen math_teacher_cat = .
			replace math_teacher_cat = 1 if math3_max==1
			replace math_teacher_cat = 2 if math3_max==0 & self_cont_max==1
			replace math_teacher_cat = 3 if math3_max+self_cont_max==0 & math2_max == 1
			replace math_teacher_cat = 4 if math3_max+math2_max+self_cont_max==0 & (math1_max == 1 | math_ct_max == 1)
			replace math_teacher_cat = 5 if math3_max+math2_max+math1_max+math_ct_max+self_cont_max==0 & sci_max==1
			
			assert math_teacher_cat !=. 
			
			******************************************
			*(1) Match student to high-certainty math teacher
			******************************************
			drop if math_teacher_cat == 1 & math3 == 0
			sort mastid year bud_obj_teach numstudents
			bysort mastid year: keep if (_n==_N & math_teacher_cat == 1) | math_teacher_cat != 1
			
			unique mastid year if math_teacher_cat == 1
			
			******************************************
			*(2) Match student to self-contained teacher
			******************************************
			drop if math_teacher_cat == 2 & self_cont == 0
			sort mastid year bud_obj_teach numstudents
			bysort mastid year: keep if (_n==_N & math_teacher_cat == 2) | math_teacher_cat != 2
			
			unique mastid year if math_teacher_cat == 2
			
			******************************************
			*(3) Match student to med-certainty teacher
			******************************************
			drop if math_teacher_cat == 3 & math2 == 0
			sort mastid year bud_obj_teach numstudents
			bysort mastid year: keep if (_n==_N & math_teacher_cat == 3) | math_teacher_cat != 3
			
			unique mastid year if math_teacher_cat == 3
			
			******************************************
			*(4) Match student to low-certainty teacher
			******************************************
			drop if math_teacher_cat == 4 & math1 == 0
			sort mastid year bud_obj_teach numstudents
			bysort mastid year: keep if (_n==_N & math_teacher_cat == 4) | math_teacher_cat != 4
			
			unique mastid year if math_teacher_cat == 4
			
			******************************************
			*(5) Match student to science teacher
			******************************************
			drop if math_teacher_cat == 5 & sci == 0
			sort mastid year bud_obj_teach numstudents
			bysort mastid year: keep if (_n==_N & math_teacher_cat == 5) | math_teacher_cat != 5
			
			unique mastid year if math_teacher_cat == 5
			
			unique mastid year
			
			save  "$basedata/course_mem_clean.dta", replace
		 
		}

		***********************************************
		* Match student to primary reading teacher/course
		***********************************************
		if `cm_clean_reading' == 1 {
			use "$temp\t.dta", clear
			gen homerm=(code2=="9932")
			egen homerm_max=max(homerm), by(mastid year)
			
			
			*These students with no identifiable ela/homeroom/social class are mostly only taking ECs (PE, music, visual arts etc.) Drop them.
			drop if read1_max+read2_max+read3_max==0 & self_cont_max==0 & lit_max==0 & homerm_max ==0 & social_max==0
			
			*Categories:
			**(1) Student has high-certainty ELA teacher
			**(2) Student has self-contained teacher, but no high-certainty ELA teacher
			**(3) Student has med-certainty ELA teacher, but no self-contained teacher
			**(4) Student has low-certainty ELA teacher, but no self-contained teacher
			**(5) Student has no ELA teacher nor self-contained teacher, but a lit/social/homeroom teacher
			
			
			gen read_teacher_cat = .
			replace read_teacher_cat = 1 if read3_max==1
			replace read_teacher_cat = 2 if read3_max==0 & self_cont_max==1
			replace read_teacher_cat = 3 if read3_max+self_cont_max==0 & read2_max == 1
			replace read_teacher_cat = 4 if read3_max+read2_max+self_cont_max==0 & read1_max == 1
			replace read_teacher_cat = 5 if read3_max+read2_max+read1_max+self_cont_max==0 & (lit_max==1 | homerm_max == 1 | social_max == 1)
			
			assert read_teacher_cat !=. 
			
			******************************************
			*(1) Match student to high-certainty ela teacher
			******************************************
			drop if read_teacher_cat == 1 & read3 == 0
			sort mastid year bud_obj_teach numstudents
			bysort mastid year: keep if (_n==_N & read_teacher_cat == 1) | read_teacher_cat != 1
			
			unique mastid year if read_teacher_cat == 1
			
			******************************************
			*(2) Match student to self-contained teacher
			******************************************
			drop if read_teacher_cat == 2 & self_cont == 0
			sort mastid year bud_obj_teach numstudents
			bysort mastid year: keep if (_n==_N & read_teacher_cat == 2) | read_teacher_cat != 2
			
			unique mastid year if read_teacher_cat == 2
			
			******************************************
			*(3) Match student to med-certainty teacher
			******************************************
			drop if read_teacher_cat == 3 & read2 == 0
			sort mastid year bud_obj_teach numstudents
			bysort mastid year: keep if (_n==_N & read_teacher_cat == 3) | read_teacher_cat != 3
			
			unique mastid year if read_teacher_cat == 3
			
			******************************************
			*(4) Match student to low-certainty teacher
			******************************************
			drop if read_teacher_cat == 4 & read1 == 0
			sort mastid year bud_obj_teach numstudents
			bysort mastid year: keep if (_n==_N & read_teacher_cat == 4) | read_teacher_cat != 4
			
			unique mastid year if read_teacher_cat == 4
			
			******************************************
			*(5) Match student to lit, homeroom, social teacher
			******************************************
			drop if read_teacher_cat == 5 & lit == 0 & homerm == 0 & social_max == 0
			sort mastid year bud_obj_teach numstudents
			bysort mastid year: keep if (_n==_N & read_teacher_cat == 5) | read_teacher_cat != 5
			
			unique mastid year if read_teacher_cat == 5
			 
			save  "$basedata/course_mem_clean_rd.dta", replace 
		}
		
	}
}

if `discipline' == 1 {
	forval y = 2008/2018{
		import sas using "$rawdata\Student\Suspension\mastsusp`y'.sas7bdat", clear
		
		destring(act1), replace
		destring(act2), replace
		destring(act3), replace
		****************
		* 2008 - 2010
		****************
		if `y' < 2011 {
			*Conduct Offenses: Disorderly conduct (22), Honor code violation (28), Dress code violation (31), Inappropriate language/disrespect (32), Insubordination (33), Falsification of information (35), Bus misbehavior (37), Inappropriate items on school property (40), disruptive behavior (42),  Misuse of technology (61), Being in an unauthorized area (75), Cell phone use (77), Disrespect to faculty/staff (78), Excessive disply of affection (79), Mutual sexual contact between two students (89), Use of counterfeit items (91), Possession of counterfeit items (92)
			gen conduct_offense = 0
			replace conduct_offense = 1 if inlist(act1, 22, 28, 31, 32, 33, 35, 37, 40,42) |  inlist(act1, 61, 75, 77, 78, 79, 89, 92)
			replace conduct_offense = 1 if inlist(act2, 22, 28, 31, 32, 33, 35, 37, 40,42) |  inlist(act2, 61, 75, 77, 78, 79, 89, 91,92)
			replace conduct_offense = 1 if inlist(act3, 22, 28, 31, 32, 33, 35, 37, 40,42) |  inlist(act3, 61, 75, 77, 78, 79, 89, 91, 92)
			
			*Truancy Offenses: Truancy (30),  Late to class (54), Excessive tardiness (74), Cutting class (76), Leaving school without permission (80), Leaving class without permission (81), Skipping school (82), Skipping class (83), 
			gen truancy_offense = 0
			replace truancy_offense = 1 if inlist(act1, 30, 54, 74, 76, 80, 81, 82, 83)
			replace truancy_offense = 1 if inlist(act2, 30, 54, 74, 76, 80, 81, 82, 83)
			replace truancy_offense = 1 if inlist(act3, 30, 54, 74, 76, 80, 81, 82, 83)
			
			
			*Assault Offenses: Assault resulting in a serious injury (01), Assault involving the use of a weapon (02), Assault on school personnel not resulting in injury (03), Robbery with a dangerous weapon (10), Robbery without a dangerous weapon (11), Rape (12), Sexual offense (13), Sexual assault not involving rape or sexual offense (14), Taking indecent liberties with a minor (15), Kidnapping (16), Assault on student w/o weapon and not resulting in injury (44), Assault on non-student w/o weapon not resulting in injury (45), Violent assault not resulting in serious injury (60), Assualt - other (96), Assault on student (97)
			gen assault_offense = 0
			replace assault_offense = 1 if inlist(act1, 01, 02, 03, 12, 13,14,15, 16, 44) | inlist(act1, 45, 60, 96, 97,10,11)
			replace assault_offense = 1 if inlist(act2, 01, 02, 03, 12, 13,14,15, 16, 44) | inlist(act1, 45, 60, 96, 97,10,11)
			replace assault_offense = 1 if inlist(act3, 01, 02, 03, 12, 13,14,15, 16, 44) | inlist(act1, 45, 60, 96, 97,10,11)
			
			*Fighting/Threatening Offenses: Communicating threats (19), Affray (21), Extortion (23), Fighting (24), Aggressive behavior (27), Bomb threat (43), Gang activity (55), 
			gen fighting_offense = 0
			replace fighting_offense = 1 if inlist(act1, 19, 21, 23, 24, 27,43,55)
			replace fighting_offense = 1 if inlist(act2, 19, 21, 23, 24, 27,43,55)
			replace fighting_offense = 1 if inlist(act3, 19, 21, 23, 24, 27,43,55)
			
			*Property Offenses: Unlawfully setting a fire (18), False fire alarm (29), Gambling (34), Theft (36), Property damage (39), Burning of a school building (46), 
			gen property_offense = 0
			replace property_offense = 1 if inlist(act1, 18,29,34,36,39,46)
			replace property_offense = 1 if inlist(act2, 18,29,34,36,39,46)
			replace property_offense = 1 if inlist(act3, 18,29,34,36,39,46)
			
			*Substance Offenses: Possession of controlled substance - cocaine (05), Possession of controlled substance - marijuana (06), Possession of controlled substance - Ritalin (07), Possession of controlled substance - other (17), Alcohol Possession (20), Possession of tobacco (41), Sale of controlled substance - cocaine (47), Sale of controlled substance - marijuana (48), Sale of controlled substance - Ritalin (49), Sale of controlled substance -other (50), Possession of a prescription drug (84), Distribution of prescription drug (85), Possession of student's own prescription drug (86), Possession of another person's prescription dug (87), Distribtion of a prescription drug (88), Use of controlled substances (90), Use of alcoholic beverages (93), Use of narcotics (94), Possession of chemical or drug paraphernalia (95), Use of tobacco (98)
			gen substance_offense = 0
			replace substance_offense = 1 if inlist(act1, 05,06,07,17,20,41,47,48,49) | inlist(act1, 84,85,86,87,88,90,93,94,95) | inlist(act1, 98, 50)
			replace substance_offense = 1 if inlist(act2, 05,06,07,17,20,41,47,48,49) | inlist(act2, 84,85,86,87,88,90,93,94,95) | inlist(act2, 98, 50)
			replace substance_offense = 1 if inlist(act3, 05,06,07,17,20,41,47,48,49) | inlist(act3, 84,85,86,87,88,90,93,94,95) | inlist(act3, 98, 50)
			
			
			*Weapon Offenses: Possession of a firearm or powerful explosive (08), Possession of a weapon (excluding firearms/explosives) (09)
			gen weapon_offense = 0
			replace weapon_offense = 1 if inlist(act1, 08,09)
			replace weapon_offense = 1 if inlist(act2, 08,09)
			replace weapon_offense = 1 if inlist(act3, 08,09)
			
			*Bullying Offense: Harassment - verbal (25), Hazing (26), Harassment - sexual (38), Bullying (51), Discrimation (99)
			gen bully_offense = 0
			replace bully_offense = 1 if inlist(act1, 25,26,38,51,99)
			replace bully_offense = 1 if inlist(act2, 25,26,38,51,99)
			replace bully_offense = 1 if inlist(act3, 25,26,38,51,99)
			
			*Other Offense: Other school defined offense (52), Other (59)
			gen other_offense = 0
			replace other_offense = 1 if inlist(act1, 52, 59)
			replace other_offense = 1 if inlist(act2, 52, 59)
			replace other_offense = 1 if inlist(act3, 52, 59)
		}
		****************
		* 2011 - 2018
		****************
		if `y' >= 2011 {
			*Conduct Offenses: Disorderly conduct (22), Honor code violation (28), Dress code violation (31), Inappropriate language/disrespect (32), Insubordination (33), Falsification of information (35), Bus misbehavior (37), Inappropriate items on school property (40), disruptive behavior (42),  Misuse of technology (91), Being in an unauthorized area (59), Cell phone use (60), Disrespect to faculty/staff (61), Excessive disply of affection (63), Mutual sexual contact between two students (68), Use of counterfeit items (46), Possession of counterfeit items (47), Inappropriate behavior (114), Indecent Exposure (115)
			gen conduct_offense = 0
			replace conduct_offense = 1 if inlist(act1, 22, 28, 31, 32, 33, 35, 37, 40,42) |  inlist(act1, 91, 59, 60, 61, 63, 68, 46,47, 114) | inlist(act1, 115)
			replace conduct_offense = 1 if inlist(act2, 22, 28, 31, 32, 33, 35, 37, 40,42) |  inlist(act2, 91, 59, 60, 61, 63, 68, 46,47, 114) | inlist(act2, 115)
			replace conduct_offense = 1 if inlist(act3, 22, 28, 31, 32, 33, 35, 37, 40,42) |  inlist(act1, 91, 59, 60, 61, 63, 68, 46,47, 114) | inlist(act3, 115)
			
			*Truancy Offenses: Truancy (30),  Late to class (78), Excessive tardiness (64), Cutting class (74), Leaving school without permission (66), Leaving class without permission (67), Skipping school (73), Skipping class (75), 
			gen truancy_offense = 0
			replace truancy_offense = 1 if inlist(act1, 30, 78, 64, 74, 66, 67, 73, 75)
			replace truancy_offense = 1 if inlist(act2, 30, 78, 64, 74, 66, 67, 73, 75)
			replace truancy_offense = 1 if inlist(act3, 30, 78, 64, 74, 66, 67, 73, 75)
			
			
			*Assault Offenses: Assault resulting in a serious injury (01), Assault involving the use of a weapon (02), Assault on school personnel not resulting in injury (03), Robbery with a dangerous weapon (10), Robbery without a dangerous weapon (11), Rape (12), Sexual offense (13), Sexual assault not involving rape or sexual offense (14), Taking indecent liberties with a minor (15), Kidnapping (16), Assault on student w/o weapon and not resulting in injury (72), Assault on non-student w/o weapon not resulting in injury (71), Violent assault not resulting in serious injury (90), Assualt - other (45), Assault on student (44), Robbery (93), Robbery with a firearm or explosive (103), Physical attack with a firearm or explosive device (104)
			gen assault_offense = 0
			replace assault_offense = 1 if inlist(act1, 01, 02, 03, 12, 13,14,15, 16, 44) | inlist(act1, 72, 71, 90, 45,10,11,44,93,103) | inlist(act1, 104)
			replace assault_offense = 1 if inlist(act2, 01, 02, 03, 12, 13,14,15, 16, 44) | inlist(act2, 72, 71, 90, 45,10,11,44,93,103) | inlist(act2, 104)
			replace assault_offense = 1 if inlist(act3, 01, 02, 03, 12, 13,14,15, 16, 44) | inlist(act3, 72, 71, 90, 45,10,11,44,93,103) | inlist(act3, 104)
			
			*Fighting/Threatening Offenses: Communicating threats (19), Affray (21), Extortion (23), Fighting (24), Aggressive behavior (27), Bomb threat (43), Gang activity (79), Threat of physical attack with a firearm (105), Threat of physical attack with a weapon (106), Threat of physical attack without a weapon (107)
			gen fighting_offense = 0
			replace fighting_offense = 1 if inlist(act1, 19, 21, 23, 24, 27,43,79, 105,106) | inlist(act1, 107)
			replace fighting_offense = 1 if inlist(act2, 19, 21, 23, 24, 27,43,79, 105,106) | inlist(act2, 107)
			replace fighting_offense = 1 if inlist(act3, 19, 21, 23, 24, 27,43,79, 105,106) | inlist(act3, 107)
			
			*Property Offenses: Unlawfully setting a fire (18), False fire alarm (29), Gambling (34), Theft (36), Property damage (39), Burning of a school building (53), 
			gen property_offense = 0
			replace property_offense = 1 if inlist(act1, 18,29,34,36,39,53)
			replace property_offense = 1 if inlist(act2, 18,29,34,36,39,53)
			replace property_offense = 1 if inlist(act3, 18,29,34,36,39,53)
			
			*Substance Offenses: Possession of controlled substance - cocaine (05), Possession of controlled substance - marijuana (06), Possession of controlled substance - Ritalin (07), Possession of controlled substance - other (17), Alcohol Possession (20), Possession of tobacco (41), Sale of controlled substance - cocaine (54), Sale of controlled substance - marijuana (55), Sale of controlled substance - Ritalin (56), Sale of controlled substance -other (57), Possession of a prescription drug (76), Distribution of prescription drug (62), Possession of student's own prescription drug (86), Possession of another person's prescription dug (87), Distribtion of a prescription drug (88), Use of controlled substances (49), Use of alcoholic beverages (48), Use of narcotics (50), Possession of chemical or drug paraphernalia (51), Use of tobacco (70), Under the influence of alcohol (95), Under the influence of controlled substances (96)
			gen substance_offense = 0
			replace substance_offense = 1 if inlist(act1, 05,06,07,17,20,41,54,55,56) | inlist(act1, 57,76,62,86,87,88,49,48,50) | inlist(act1, 51, 70, 95,96)
			replace substance_offense = 1 if inlist(act2, 05,06,07,17,20,41,54,55,56) | inlist(act2, 57,76,62,86,87,88,49,48,50) | inlist(act2, 51, 70, 95,96)
			replace substance_offense = 1 if inlist(act3, 05,06,07,17,20,41,54,55,56) | inlist(act3, 57,76,62,86,87,88,49,48,50) | inlist(act3, 51, 70, 95,96)
			
			
			*Weapon Offenses: Possession of a firearm or powerful explosive (08), Possession of a weapon (excluding firearms/explosives) (09)
			gen weapon_offense = 0
			replace weapon_offense = 1 if inlist(act1, 08,09)
			replace weapon_offense = 1 if inlist(act2, 08,09)
			replace weapon_offense = 1 if inlist(act3, 08,09)
			
			*Bullying Offense: Harassment - verbal (25), Hazing (26), Harassment - sexual (38), Bullying (52), Discrimation (80), Cyber-bullying (94), Harassment - Racial (101), Harassment - Disability (102), Harassment - Sexual orientation (109), Harassment - Religious affiliation (110)
			gen bully_offense = 0
			replace bully_offense = 1 if inlist(act1, 25,26,38,52,80, 94, 101,102,109) | inlist(act1, 110)
			replace bully_offense = 1 if inlist(act2, 25,26,38,52,80, 94, 101,102,109) | inlist(act1, 110)
			replace bully_offense = 1 if inlist(act3, 25,26,38,52,80, 94, 101,102,109) | inlist(act1, 110)
			
			*Other Offense: Other school defined offense (58), Other (69)
			gen other_offense = 0
			replace other_offense = 1 if inlist(act1, 58, 69)
			replace other_offense = 1 if inlist(act2, 58, 69)
			replace other_offense = 1 if inlist(act3, 58, 69)
		}
		
		
		gen any_offense = 0
		replace any_offense = 1 if inlist(1, conduct_offense, assault_offense, fighting_offense, property_offense, weapon_offense, bully_offense, other_offense, truancy_offense, substance_offense)

		gen sy = `y'
		
		gen conduct_offense_N = conduct_offense
		gen assault_offense_N = assault_offense
		gen fighting_offense_N = fighting_offense
		gen property_offense_N = property_offense
		gen weapon_offense_N = weapon_offense
		gen bully_offense_N = bully_offense
		gen other_offense_N = other_offense
		gen truancy_offense_N = truancy_offense
		gen substance_offense_N = substance_offense
		gen any_offense_N = any_offense
		
		collapse (max) *offense (sum) *offense_N, by(mastid lea schlcode sy)
		
		rename lea ncerdc_lea
		rename schlcode ncerdc_schlcode
		
		
		sum
		
		tempfile temp_disc_`y'
		save `temp_disc_`y''
	}
	clear
	forval y = 2008/2018 {
		append using `temp_disc_`y''
		save "$basedata/ncerdc_discipline.dta", replace
		}
}